## Bequest receipt probability

## What's the probability of the people in each starting cohort's parents dying each year

## output: by starting cohort, year and parent age, probability of getting a bequest, among those who have not had a bequest
## This will also determine the size of bequest that each cohort gets. Bequest pool is formed for each parent age group.
## Initial split by people who have parents of that age who died.


# Prelims -----------------------------------------------------------------

rm(list=ls())
gc()



# Read HILDA grouped starting cohort ---------------------------------------------------------------

hilda_grouped <- qread("./Input data/Intermediate input data/hilda_grouped_starting_cohort.qs") %>% 
  filter(wavenumber==18)

starting_cohorts <- hilda_grouped %>% 
  mutate(starting_cohort = paste0("SA", age_grp, " SI", total_inc_qtile, " SW", total_wealth_qtile, " SB", bequest_received, " SH", homeowner)) %>% 
  select(xwaveid, hhwte, starting_cohort, parent_age_grp_0=parent_age_grp) 
## parent_age_grp_0 indicates parent age in starting year, as opposed to parent age in the projection year

## plot number of people who have parents in a certain age cohort - unused
# parent_age_plot <- starting_cohorts %>% 
#   filter(!is.na(parent_age_grp_0)) %>% 
#   group_by(parent_age_grp_0) %>% 
#   summarise(n=sum(hhwte)) %>% 
#   ggplot(.) + 
#   geom_col(aes(x=parent_age_grp_0, y=n))


# Read in mortality rates -------------------------------------------------

mortality_rates <- qread("./Input data/mortality_rates_ay.qs")



# Bequest receipt probability -----------------------------

## Calc prob of parents surviving to year 2050 by age group
## Then apply to starting cohorts to get a distribution of probabilities of parent survival each year

## starting parent age groups and projection years
parent_age_grp_0 <- unique(hilda_grouped$parent_age_grp) %>% sort
projection_year_id <- c(1:32)

## calc parent mortality
parent_mortality <- expand_grid(parent_age_grp_0, year=projection_year_id) %>% 
  ## age the parents every 5 years
  mutate(age_increment = ceiling((year+1)/5)-1, ## gets increment relative to current age group (e.g. given the year, age group will be 3 factor levels higher than current age group)
         parent_age_grp = (as.numeric(parent_age_grp_0) + age_increment) %>% 
           ## add factor levels back
           factor(., levels=c(1:21), labels=levels(parent_age_grp_0), ordered=T)
  ) %>% 
  ## it is assumed that no one lives past 5 years of being in the [100-105] age grp. So any NAs of age grp means they are not alive in that year
  ## create mortality_year variable to merge correct mortality rates for the year. ie for first year the cohort is at that age group, use mortality_year 1 rate. For second year, use mortality_year 2 rate, etc
  ## only one death rate availlable for 100-105 yos, make mortality_year=1 to match
  mutate(mortality_year= ifelse(parent_age_grp=="[100,105]", 1,
                                year%%5 +1 )  ) %>% 
  left_join(mortality_rates %>% select(parent_age_grp=age_grp, mortality_year, mortality_rate)) %>% 
  ## edit mortality rate to 1 if age is 100-105 and it is the last time period for that age group (not including 32, the end of our projection period - ppl continue to live after that)
  mutate(mortality_rate = ifelse(parent_age_grp=="[100,105]" & is.na(lead(parent_age_grp)), 1, mortality_rate))

## from child perspective, these mortality rates can be interpreted as probability of receiving a intergen bequest given you have not already

## for each projection year, get parent mortality rates by starting cohort and parent age
bequest_receipt_prob <- lapply(projection_year_id, function(x) {
  
  year_cohorts <- starting_cohorts %>% 
    ## year
    mutate(year = x) %>% 
    ## parent age at that year. if NA, then parents have since passed
    mutate(age_increment = ceiling((year+1)/5)-1, ## gets increment relative to current age group (e.g. given the year, age group will be 3 factor levels higher than current age group)
           parent_age_grp = (as.numeric(parent_age_grp_0) + age_increment) %>% 
             ## add factor levels back
             factor(., levels=c(1:21), labels=levels(parent_age_grp_0), ordered=T)
    ) %>% 
    ## new bequest received id at current year based on missing parent ages
    mutate(bequest_received = ifelse(is.na(parent_age_grp), 1, 0)) %>% 
    ## filter to people who have not previously received bequest, ie parent still alive
    filter(bequest_received==0) %>% 
    ## groups by starting cohort and parent age
    distinct(year, starting_cohort, parent_age_grp, parent_age_grp_0) %>% 
    ## merge in parent mortality from above
    left_join(parent_mortality %>% select(year, parent_age_grp, mortality_rate)) %>% 
    select(-parent_age_grp) %>% 
    arrange(parent_age_grp_0) %>% 
    ## make wide
    mutate(parent_age_grp_0 = paste0("parent_age0_mort_", parent_age_grp_0)) %>% 
    pivot_wider(names_from = parent_age_grp_0, values_from = mortality_rate) %>% 
    arrange(starting_cohort)

}) %>% 
  rbindlist(fill=T)

## check probabilities all <1


## save
qsave(bequest_receipt_prob, "./Input data/bequest_receipt_prob_aiwbh_yp.qs")
## by starting cohort, year and parent age